Importing data
countMatrix <- ReadDataFrameFromTsv(file.name.path="../data/refSEQ_countMatrix.txt")
## ../data/refSEQ_countMatrix.txt read from disk!
# head(countMatrix)
designMatrix <- ReadDataFrameFromTsv(file.name.path="../design/all_samples_short_names.tsv.csv")
## ../design/all_samples_short_names.tsv.csv read from disk!
# head(designMatrix)
rownames <- as.integer(rownames(countMatrix))
rownames <- rownames[order(rownames)]
rownames.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames, c("external_gene_name",
"mgi_symbol", "entrezgene"))
noNaCountMatrix <- attachGeneColumnToDf(mainDf=countMatrix,
genesMap=rownames.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
dim(noNaCountMatrix)
## [1] 20730 42
filteredCountsProp <- filterLowCounts(counts.dataframe=noNaCountMatrix,
is.normalized=FALSE,
design.dataframe=designMatrix,
cond.col.name="gcondition",
method.type="Proportion")
## features dimensions before normalization: 20730
## Filtering out low count features...
## 13580 features are to be kept for differential expression analysis with filtering method 3
Plot PCA of log unnormalized data
pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
Normalizations
Upper Quartile Normalization
normPropCountsUqua <- NormalizeData(data.to.normalize=filteredCountsProp,
norm.type="uqua",
design.matrix=designMatrix)
pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
edgeR DE Analysis
### edgering
source("../R/edgeRFunctions.R")
source("../R/VolcanoPlotFunctions.R")
source("../R/plotUtils.R")
source("../R/MAPlotFunctions.R")
source("../R/pvalHistPlotFunctions.R")
source("../R/plotUtils.R")
cc <- c("WTSD5 - WTHC5", "WTRS2 - WTHC7")
rescList1 <- applyEdgeR(counts=normPropCountsUqua, design.matrix=designMatrix,
factors.column="gcondition",
weight.columns=NULL,
contrasts=cc, useIntercept=FALSE, p.threshold=1,
verbose=TRUE)
WTSD5 - WTHC5
PlotHistPvalPlot(de.results=rescList1[[1]], design.matrix=designMatrix,
show.plot.flag=TRUE, plotly.flag=TRUE,
prefix.plot=names(rescList1)[1])
SD loading positive controls
sd.pos.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/SD_full_pos_control_genes_BMC_genomics.csv")
sd.pos.ctrls <- sd.pos.ctrls$MGI.Symbol
sd.pos.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
filter.values=sd.pos.ctrls, c("external_gene_name",
"mgi_symbol", "entrezgene"))
sd.pos.map.nna <- sd.pos.map[-which(is.na(sd.pos.map$entrezgene)),]
sd.pos.genes.entrez <- sd.pos.map.nna$entrezgene
## mapping ensembl gene id using biomart
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames(rescList1[[1]]),
c("external_gene_name", "mgi_symbol", "entrezgene"))
res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
genesMap=res.o.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
PlotVolcanoPlot(de.results=res.o, counts.dataframe=normPropCountsUqua, design.matrix=designMatrix,
show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05,
positive.ctrls.list=sd.pos.genes.entrez)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)
WTRS2 - WTHC7
PlotHistPvalPlot(de.results=rescList1[[2]], design.matrix=designMatrix,
show.plot.flag=TRUE, plotly.flag=TRUE,
prefix.plot=names(rescList1)[2])
RS loading positive controls
rs2.pos.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/RS2_pos_control_genes_BMC_genomics.csv")
rs2.pos.ctrls <- rs2.pos.ctrls$MGI.Symbol
rs2.pos.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
filter.values=rs2.pos.ctrls, c("external_gene_name",
"mgi_symbol", "entrezgene"))
rs2.pos.map.nna <- rs2.pos.map[-which(is.na(rs2.pos.map$entrezgene)),]
rs2.pos.genes.entrez <- rs2.pos.map.nna$entrezgene
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames(rescList1[[2]]),
c("external_gene_name", "mgi_symbol", "entrezgene"))
res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
genesMap=res.o.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
PlotVolcanoPlot(de.results=res.o, counts.dataframe=normPropCountsUqua, design.matrix=designMatrix,
show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05,
positive.ctrls.list=rs2.pos.genes.entrez)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05)